First do a linear Regression directly from the Normal euqation


In [19]:
import numpy as np
import matplotlib.pyplot as plt

In [7]:
X = [[1, 6, 2], [1, 8, 1], [1, 10, 0], [1, 14, 2], [1, 18, 0]]
y = [[7], [9], [13], [17.5], [18]]

In [8]:
print np.dot(np.linalg.inv(np.dot(np.transpose(X), X)), np.dot(np.transpose(X), y))


[[ 1.1875    ]
 [ 1.01041667]
 [ 0.39583333]]

Linear regression using least square


In [10]:
print np.linalg.lstsq(X, y)[0]


[[ 1.1875    ]
 [ 1.01041667]
 [ 0.39583333]]

Linear regression using sklearn


In [11]:
from sklearn.linear_model import LinearRegression

In [15]:
X = [[6, 2], [8, 1], [10, 0], [14, 2], [18, 0]]
y = [[7],    [9],    [13],    [17.5],  [18]]
X_test = [[8, 2], [9, 0], [11, 2], [16, 2], [12, 0]]
y_test = [[11],   [8.5],  [15],    [18],    [11]]

In [17]:
model = LinearRegression()
model.fit(X, y)


Out[17]:
LinearRegression(copy_X=True, fit_intercept=True, normalize=False)

In [18]:
predictions = model.predict(X_test)
for i, prediction in enumerate(predictions):
    print 'Predicted: %s, Target: %s' % (prediction, y_test[i])
print 'R-squared: %.2f' % model.score(X_test, y_test)


Predicted: [ 10.0625], Target: [11]
Predicted: [ 10.28125], Target: [8.5]
Predicted: [ 13.09375], Target: [15]
Predicted: [ 18.14583333], Target: [18]
Predicted: [ 13.3125], Target: [11]
R-squared: 0.77

Polynomial regression


In [20]:
from sklearn.preprocessing import PolynomialFeatures

In [21]:
X_train = [[6], [8], [10], [14],   [18]]
y_train = [[7], [9], [13], [17.5], [18]]
X_test = [[6],  [8],   [11], [16]]
y_test = [[8], [12], [15], [18]]

In [23]:
quadratic_featurizer = PolynomialFeatures(degree=2)
X_train_quadratic = quadratic_featurizer.fit_transform(X_train)
X_test_quadratic = quadratic_featurizer.transform(X_test)

In [28]:
regressor_quadratic = LinearRegression()
regressor_quadratic.fit(X_train_quadratic, y_train)
xx = np.linspace(0, 26, 100)
xx_quadratic = quadratic_featurizer.transform(xx.reshape(xx.shape[0], 1))

plt.plot(xx, regressor_quadratic.predict(xx_quadratic), c='r',
   linestyle='--')
plt.title('Pizza price regressed on diameter')
plt.xlabel('Diameter in inches')
plt.ylabel('Price in dollars')
plt.axis([0, 25, 0, 25])
plt.grid(True)
plt.scatter(X_train, y_train)
plt.show()

In [ ]: